<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  
  <link rel="shortcut icon" href="../img/favicon.ico">
  <title>优化器 Optimizers - Keras 中文文档</title>
  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>

  <link rel="stylesheet" href="../css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
  
  <script>
    // Current page data
    var mkdocs_page_name = "\u4f18\u5316\u5668 Optimizers";
    var mkdocs_page_input_path = "optimizers.md";
    var mkdocs_page_url = "/zh/optimizers/";
  </script>
  
  <script src="../js/jquery-2.1.1.min.js" defer></script>
  <script src="../js/modernizr-2.8.3.min.js" defer></script>
  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
  <script>hljs.initHighlightingOnLoad();</script> 
  
  <script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

      ga('create', 'UA-61785484-1', 'keras.io');
      ga('send', 'pageview');
  </script>
  
</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
      <div class="wy-side-nav-search">
        <a href=".." class="icon icon-home"> Keras 中文文档</a>
        <div role="search">
  <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
  </form>
</div>
      </div>

      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
	<ul class="current">
	  
          
            <li class="toctree-l1">
		
    <a class="" href="..">主页</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../why-use-keras/">为什么选择 Keras?</a>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">快速开始</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../getting-started/sequential-model-guide/">Sequential 顺序模型指引</a>
                </li>
                <li class="">
                    
    <a class="" href="../getting-started/functional-api-guide/">函数式 API 指引</a>
                </li>
                <li class="">
                    
    <a class="" href="../getting-started/faq/">FAQ 常见问题解答</a>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">模型</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../models/about-keras-models/">关于 Keras 模型</a>
                </li>
                <li class="">
                    
    <a class="" href="../models/sequential/">Sequential 顺序模型 API</a>
                </li>
                <li class="">
                    
    <a class="" href="../models/model/">函数式 API</a>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">Layers</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../layers/about-keras-layers/">关于 Keras 网络层</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/core/">核心网络层</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/convolutional/">卷积层 Convolutional</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/pooling/">池化层 Pooling</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/local/">局部连接层 Locally-connected</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/recurrent/">循环层 Recurrent</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/embeddings/">嵌入层 Embedding</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/merge/">融合层 Merge</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/advanced-activations/">高级激活层 Advanced Activations</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/normalization/">标准化层 Normalization</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/noise/">噪声层 Noise</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/wrappers/">层封装器 wrappers</a>
                </li>
                <li class="">
                    
    <a class="" href="../layers/writing-your-own-keras-layers/">编写你自己的层</a>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">数据预处理</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../preprocessing/sequence/">序列预处理</a>
                </li>
                <li class="">
                    
    <a class="" href="../preprocessing/text/">文本预处理</a>
                </li>
                <li class="">
                    
    <a class="" href="../preprocessing/image/">图像预处理</a>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../losses/">损失函数 Losses</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../metrics/">评估标准 Metrics</a>
	    </li>
          
            <li class="toctree-l1 current">
		
    <a class="current" href="./">优化器 Optimizers</a>
    <ul class="subnav">
            
    <li class="toctree-l2"><a href="#_1">优化器的用法</a></li>
    

    <li class="toctree-l2"><a href="#keras">Keras 优化器的公共参数</a></li>
    
        <ul>
        
            <li><a class="toctree-l3" href="#sgd">SGD</a></li>
        
            <li><a class="toctree-l3" href="#rmsprop">RMSprop</a></li>
        
            <li><a class="toctree-l3" href="#adagrad">Adagrad</a></li>
        
            <li><a class="toctree-l3" href="#adadelta">Adadelta</a></li>
        
            <li><a class="toctree-l3" href="#adam">Adam</a></li>
        
            <li><a class="toctree-l3" href="#adamax">Adamax</a></li>
        
            <li><a class="toctree-l3" href="#nadam">Nadam</a></li>
        
        </ul>
    

    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../activations/">激活函数 Activations</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../callbacks/">回调函数 Callbacks</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../datasets/">常用数据集 Datasets</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../applications/">应用 Applications</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../backend/">后端 Backend</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../initializers/">初始化 Initializers</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../regularizers/">正则化 Regularizers</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../constraints/">约束 Constraints</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../visualization/">可视化 Visualization</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../scikit-learn-api/">Scikit-learn API</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../utils/">工具</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../contributing/">贡献</a>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">经典样例</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../examples/addition_rnn/">Addition RNN</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/babi_rnn/">Baby RNN</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/babi_memnn/">Baby MemNN</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/cifar10_cnn/">CIFAR-10 CNN</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/cifar10_cnn_capsule/">CIFAR-10 CNN-Capsule</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/cifar10_cnn_tfaugment2d/">CIFAR-10 CNN with augmentation (TF)</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/cifar10_resnet/">CIFAR-10 ResNet</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/conv_filter_visualization/">Convolution filter visualization</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/image_ocr/">Image OCR</a>
                </li>
                <li class="">
                    
    <a class="" href="../examples/imdb_bidirectional_lstm/">Bidirectional LSTM</a>
                </li>
    </ul>
	    </li>
          
        </ul>
      </div>
      &nbsp;
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="..">Keras 中文文档</a>
      </nav>

      
      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href="..">Docs</a> &raquo;</li>
    
      
    
    <li>优化器 Optimizers</li>
    <li class="wy-breadcrumbs-aside">
      
        <a href="https://github.com/keras-team/keras-docs-zh/edit/master/docs/optimizers.md"
          class="icon icon-github"> Edit on GitHub</a>
      
    </li>
  </ul>
  <hr/>
</div>
          <div role="main">
            <div class="section">
              
                <h2 id="_1">优化器的用法</h2>
<p>优化器 (optimizer) 是编译 Keras 模型的所需的两个参数之一：</p>
<pre><code class="python">from keras import optimizers

model = Sequential()
model.add(Dense(64, kernel_initializer='uniform', input_shape=(10,)))
model.add(Activation('softmax'))

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_squared_error', optimizer=sgd)
</code></pre>

<p>你可以先实例化一个优化器对象，然后将它传入 <code>model.compile()</code>，像上述示例中一样，
或者你可以通过名称来调用优化器。在后一种情况下，将使用优化器的默认参数。</p>
<pre><code class="python"># 传入优化器名称: 默认参数将被采用
model.compile(loss='mean_squared_error', optimizer='sgd')
</code></pre>

<hr />
<h2 id="keras">Keras 优化器的公共参数</h2>
<p>参数 <code>clipnorm</code> 和 <code>clipvalue</code> 能在所有的优化器中使用，用于控制梯度裁剪（Gradient Clipping）：</p>
<pre><code class="python">from keras import optimizers

# 所有参数梯度将被裁剪，让其l2范数最大为1：g * 1 / max(1, l2_norm)
sgd = optimizers.SGD(lr=0.01, clipnorm=1.)
</code></pre>

<pre><code class="python">from keras import optimizers

# 所有参数d 梯度将被裁剪到数值范围内：
# 最大值0.5
# 最小值-0.5
sgd = optimizers.SGD(lr=0.01, clipvalue=0.5)
</code></pre>

<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L157">[source]</a></span></p>
<h3 id="sgd">SGD</h3>
<pre><code class="python">keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
</code></pre>

<p>随机梯度下降优化器。</p>
<p>包含扩展功能的支持：
- 动量（momentum）优化,
- 学习率衰减（每次参数更新后）
- Nestrov 动量 (NAG) 优化</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率。</li>
<li><strong>momentum</strong>: float &gt;= 0. 参数，用于加速 SGD 在相关方向上前进，并抑制震荡。</li>
<li><strong>decay</strong>: float &gt;= 0. 每次参数更新后学习率衰减值。</li>
<li><strong>nesterov</strong>: boolean. 是否使用 Nesterov 动量。</li>
</ul>
<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L220">[source]</a></span></p>
<h3 id="rmsprop">RMSprop</h3>
<pre><code class="python">keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
</code></pre>

<p>RMSProp 优化器.</p>
<p>建议使用优化器的默认参数
（除了学习率 lr，它可以被自由调节）</p>
<p>这个优化器通常是训练循环神经网络RNN的不错选择。</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率。</li>
<li><strong>rho</strong>: float &gt;= 0. RMSProp梯度平方的移动均值的衰减率.</li>
<li><strong>epsilon</strong>: float &gt;= 0. 模糊因子. 若为 <code>None</code>, 默认为 <code>K.epsilon()</code>。</li>
<li><strong>decay</strong>: float &gt;= 0. 每次参数更新后学习率衰减值。</li>
</ul>
<p><strong>参考文献</strong></p>
<ul>
<li><a href="http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">rmsprop: Divide the gradient by a running average of its recent magnitude</a></li>
</ul>
<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L288">[source]</a></span></p>
<h3 id="adagrad">Adagrad</h3>
<pre><code class="python">keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
</code></pre>

<p>Adagrad 优化器。</p>
<p>Adagrad 是一种具有特定参数学习率的优化器，它根据参数在训练期间的更新频率进行自适应调整。参数接收的更新越多，更新越小。</p>
<p>建议使用优化器的默认参数。</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率.</li>
<li><strong>epsilon</strong>: float &gt;= 0. 若为 <code>None</code>, 默认为 <code>K.epsilon()</code>.</li>
<li><strong>decay</strong>: float &gt;= 0. 每次参数更新后学习率衰减值.</li>
</ul>
<p><strong>参考文献</strong></p>
<ul>
<li><a href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">Adaptive Subgradient Methods for Online Learning and Stochastic Optimization</a></li>
</ul>
<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L353">[source]</a></span></p>
<h3 id="adadelta">Adadelta</h3>
<pre><code class="python">keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
</code></pre>

<p>Adadelta 优化器。</p>
<p>Adadelta 是 Adagrad 的一个具有更强鲁棒性的的扩展版本，它不是累积所有过去的梯度，而是根据渐变更新的移动窗口调整学习速率。 
这样，即使进行了许多更新，Adadelta 仍在继续学习。 与 Adagrad 相比，在 Adadelta 的原始版本中，您无需设置初始学习率。 
在此版本中，与大多数其他 Keras 优化器一样，可以设置初始学习速率和衰减因子。</p>
<p>建议使用优化器的默认参数。</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率，建议保留默认值。</li>
<li><strong>rho</strong>: float &gt;= 0. Adadelta梯度平方移动均值的衰减率。</li>
<li><strong>epsilon</strong>: float &gt;= 0. 模糊因子. 若为 <code>None</code>, 默认为 <code>K.epsilon()</code>。</li>
<li><strong>decay</strong>: float &gt;= 0. 每次参数更新后学习率衰减值。</li>
</ul>
<p><strong>参考文献</strong></p>
<ul>
<li><a href="http://arxiv.org/abs/1212.5701">Adadelta - an adaptive learning rate method</a></li>
</ul>
<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L436">[source]</a></span></p>
<h3 id="adam">Adam</h3>
<pre><code class="python">keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
</code></pre>

<p>Adam 优化器。</p>
<p>默认参数遵循原论文中提供的值。</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率。</li>
<li><strong>beta_1</strong>: float, 0 &lt; beta &lt; 1. 通常接近于 1。</li>
<li><strong>beta_2</strong>: float, 0 &lt; beta &lt; 1. 通常接近于 1。</li>
<li><strong>epsilon</strong>: float &gt;= 0. 模糊因子. 若为 <code>None</code>, 默认为 <code>K.epsilon()</code>。</li>
<li><strong>decay</strong>: float &gt;= 0. 每次参数更新后学习率衰减值。</li>
<li><strong>amsgrad</strong>: boolean. 是否应用此算法的 AMSGrad 变种，来自论文 "On the Convergence of Adam and Beyond"。</li>
</ul>
<p><strong>参考文献</strong></p>
<ul>
<li><a href="http://arxiv.org/abs/1412.6980v8">Adam - A Method for Stochastic Optimization</a></li>
<li><a href="https://openreview.net/forum?id=ryQu7f-RZ">On the Convergence of Adam and Beyond</a></li>
</ul>
<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L527">[source]</a></span></p>
<h3 id="adamax">Adamax</h3>
<pre><code class="python">keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
</code></pre>

<p>Adamax 优化器，来自 Adam 论文的第七小节.</p>
<p>它是Adam算法基于无穷范数（infinity norm）的变种。
默认参数遵循论文中提供的值。</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率。</li>
<li><strong>beta_1/beta_2</strong>: floats, 0 &lt; beta &lt; 1. 通常接近于 1。</li>
<li><strong>epsilon</strong>: float &gt;= 0. 模糊因子. 若为 <code>None</code>, 默认为 <code>K.epsilon()</code>。</li>
<li><strong>decay</strong>: float &gt;= 0. 每次参数更新后学习率衰减值。</li>
</ul>
<p><strong>参考文献</strong></p>
<ul>
<li><a href="http://arxiv.org/abs/1412.6980v8">Adam - A Method for Stochastic Optimization</a></li>
</ul>
<hr />
<p><span style="float:right;"><a href="https://github.com/keras-team/keras/blob/master/keras/optimizers.py#L605">[source]</a></span></p>
<h3 id="nadam">Nadam</h3>
<pre><code class="python">keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
</code></pre>

<p>Nesterov 版本 Adam 优化器。</p>
<p>正像 Adam 本质上是 RMSProp 与动量 momentum 的结合，
Nadam 是采用 Nesterov momentum 版本的 Adam 优化器。</p>
<p>默认参数遵循论文中提供的值。
建议使用优化器的默认参数。</p>
<p><strong>参数</strong></p>
<ul>
<li><strong>lr</strong>: float &gt;= 0. 学习率。</li>
<li><strong>beta_1/beta_2</strong>: floats, 0 &lt; beta &lt; 1. 通常接近于 1。</li>
<li><strong>epsilon</strong>: float &gt;= 0. 模糊因子. 若为 <code>None</code>, 默认为 <code>K.epsilon()</code>。</li>
</ul>
<p><strong>参考文献</strong></p>
<ul>
<li><a href="http://cs229.stanford.edu/proj2015/054_report.pdf">Nadam report</a></li>
<li><a href="http://www.cs.toronto.edu/~fritz/absps/momentum.pdf">On the importance of initialization and momentum in deep learning</a></li>
</ul>
              
            </div>
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="../activations/" class="btn btn-neutral float-right" title="激活函数 Activations">Next <span class="icon icon-circle-arrow-right"></span></a>
      
      
        <a href="../metrics/" class="btn btn-neutral" title="评估标准 Metrics"><span class="icon icon-circle-arrow-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <!-- Copyright etc -->
    
  </div>

  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
      
        </div>
      </div>

    </section>

  </div>

  <div class="rst-versions" role="note" style="cursor: pointer">
    <span class="rst-current-version" data-toggle="rst-current-version">
      
          <a href="https://github.com/keras-team/keras-docs-zh/" class="fa fa-github" style="float: left; color: #fcfcfc"> GitHub</a>
      
      
        <span><a href="../metrics/" style="color: #fcfcfc;">&laquo; Previous</a></span>
      
      
        <span style="margin-left: 15px"><a href="../activations/" style="color: #fcfcfc">Next &raquo;</a></span>
      
    </span>
</div>
    <script>var base_url = '..';</script>
    <script src="../js/theme.js" defer></script>
      <script src="../search/main.js" defer></script>

</body>
</html>
